data_RQ1a <- read.csv("RQ1a_receptions_by_OH.csv", header = TRUE, sep = ",")
data_RQ1b <- read.csv("RQ1b_recept_perc_OH.csv", header = TRUE, sep = ",")
data_RQ2 <- read.csv("RQ2_recep_attacks_kp.csv", header = TRUE, sep = ",")
data_RQ3 <- read.csv("RQ3_OH_proportion_final2.csv", header = TRUE, sep = ",")
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.4.2 ✔ purrr 1.0.1
## ✔ tibble 3.2.1 ✔ dplyr 1.1.1
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.0.1 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library("ggplot2")
library("Hmisc")
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library("plyr")
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:Hmisc':
##
## is.discrete, summarize
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
library("dplyr")
library("aod")
##
## Attaching package: 'aod'
## The following object is masked from 'package:survival':
##
## rats
library("scales")
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library("ggpubr")
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
# showing that standard linear model is not a good idea for matches won or sets won (patterns for sets or matches won are very similar)
# per set
lmod1 = lm(Sets.Won ~ Percent.Receptions.by.OH, data_RQ1a)
with(data_RQ1a, plot(Percent.Receptions.by.OH, Sets.Won))
abline(lmod1) # fit is a bad idea
summary(lmod1)
##
## Call:
## lm(formula = Sets.Won ~ Percent.Receptions.by.OH, data = data_RQ1a)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32490 -0.12885 0.00158 0.15498 0.31557
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2066 0.2346 0.881 0.391
## Percent.Receptions.by.OH 0.5416 0.4743 1.142 0.270
##
## Residual standard error: 0.195 on 16 degrees of freedom
## Multiple R-squared: 0.07535, Adjusted R-squared: 0.01756
## F-statistic: 1.304 on 1 and 16 DF, p-value: 0.2703
# per match
lmod2 = lm(Matches.Won ~ Percent.Receptions.by.OH, data_RQ1a)
with(data_RQ1a, plot(Percent.Receptions.by.OH, Matches.Won))
abline(lmod2) # fit is a bad idea
summary(lmod2)
##
## Call:
## lm(formula = Matches.Won ~ Percent.Receptions.by.OH, data = data_RQ1a)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53909 -0.20533 0.00735 0.22768 0.55388
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1460 0.4029 0.362 0.722
## Percent.Receptions.by.OH 0.6514 0.8148 0.799 0.436
##
## Residual standard error: 0.3349 on 16 degrees of freedom
## Multiple R-squared: 0.03841, Adjusted R-squared: -0.02169
## F-statistic: 0.6391 on 1 and 16 DF, p-value: 0.4357
## diagnostics - checking for violations of constant variance, normality, and linearity and possible outliers.
plot(lmod1)
termplot(lmod1, partial=TRUE,terms = 1)
## finding the correlation between the % of receptions by OH and the % of sets or matches won.
# per set
cor(data_RQ1a$Percent.Receptions.by.OH,data_RQ1a$Sets.Won)
## [1] 0.2745047
# per match
cor(data_RQ1a$Percent.Receptions.by.OH,data_RQ1a$Matches.Won)
## [1] 0.1959897
ANSWER: Although the normality and linearity assumptions seem reasonable, and there are no influential outliers, there seems to be non-constant variance in the data (see trend in plot of fitted values vs square root of standardized residuals). Therefore, a standard linear regression may not be the best fit for the data. Nonetheless, a standard linear model shows that a one-percent increase in reception attempts (by OH per team) is associated with less than one percent increase in sets and matches won, respectively. However, none of these results are statistically significant.
mean(data_RQ1b$recep_percent_OH)
## [1] 56.36913
#per set
glmod0 = glm(won_set ~ 1, data = data_RQ1b, family = "binomial")
glmod = glm(won_set ~ recep_percent_OH, data = data_RQ1b, family = "binomial")
summary(glmod)
##
## Call:
## glm(formula = won_set ~ recep_percent_OH, family = "binomial",
## data = data_RQ1b)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.238 -1.230 1.120 1.126 1.136
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0980828 0.1783255 0.550 0.582
## recep_percent_OH 0.0004393 0.0029633 0.148 0.882
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1419.9 on 1026 degrees of freedom
## Residual deviance: 1419.8 on 1025 degrees of freedom
## AIC: 1423.8
##
## Number of Fisher Scoring iterations: 3
anova(glmod0, glmod, test = "Chisq") # likelihood ratio
## Analysis of Deviance Table
##
## Model 1: won_set ~ 1
## Model 2: won_set ~ recep_percent_OH
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1026 1419.9
## 2 1025 1419.8 1 0.021974 0.8822
# per match
glmod02 = glm(won_match ~ 1, data = data_RQ1b, family = "binomial")
glmod2 = glm(won_match ~ recep_percent_OH, data = data_RQ1b, family = "binomial")
summary(glmod2)
##
## Call:
## glm(formula = won_match ~ recep_percent_OH, family = "binomial",
## data = data_RQ1b)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.271 -1.239 1.096 1.116 1.154
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.055083 0.178374 0.309 0.757
## recep_percent_OH 0.001619 0.002966 0.546 0.585
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1418.2 on 1026 degrees of freedom
## Residual deviance: 1417.9 on 1025 degrees of freedom
## AIC: 1421.9
##
## Number of Fisher Scoring iterations: 3
anova(glmod02, glmod2, test = "Chisq") # likelihood ratio
## Analysis of Deviance Table
##
## Model 1: won_match ~ 1
## Model 2: won_match ~ recep_percent_OH
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1026 1418.2
## 2 1025 1417.9 1 0.29811 0.5851
newdata = data.frame(recept_percent_OH= 24) # choose a value of receptions
logodds1 = (coef(glmod2)[1] + coef(glmod2)[2]*newdata) # log of winning at first value of receptions
odds1 = exp(logodds1) # calculate the odds
newdata2 = newdata = data.frame(recep_percent_OH = 25) # choose a value of receptions
logodds2 = (coef(glmod2)[1] + coef(glmod2)[2]*newdata2) # log of winning at second value of receptions
odds2 = exp(logodds2) # calculate the odds
# check interpretation
odds2/odds1
## recep_percent_OH
## 1 1.001621
exp(coef(glmod2)[2])
## recep_percent_OH
## 1.001621
ANSWER: A logistic regression model does not perform better than an-intercept model (that considers the mean only).There is no statistical evidence that a higher percentage of reception attempts is associated with a higher likelihood of sets or matches won. The odds of winning at a percent of receptions of 56 is the same as the odds of winning at an average level of reception attempts of 55%.
#
data_RQ2$recep_attack_count_mean <- data_RQ2$recep_attack_count - mean(data_RQ2$recep_attack_count)
lmod2 = lm(kill_percent ~ recep_attack_count_mean, data_RQ2)
with(data_RQ2, plot(recep_attack_count_mean, kill_percent))
abline(lmod2)
summary(lmod2)
##
## Call:
## lm(formula = kill_percent ~ recep_attack_count_mean, data = data_RQ2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.9314 -3.0083 -0.1308 2.4841 23.6778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.82105 0.18007 126.734 <2e-16 ***
## recep_attack_count_mean -0.01004 0.01183 -0.849 0.396
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.299 on 568 degrees of freedom
## Multiple R-squared: 0.001268, Adjusted R-squared: -0.0004905
## F-statistic: 0.721 on 1 and 568 DF, p-value: 0.3962
# Computing Pearson's product-moment correlation
cor.test(data_RQ2$recep_attack_count, data_RQ2$kill_percent, method = c("pearson", "kendall", "spearman"))
##
## Pearson's product-moment correlation
##
## data: data_RQ2$recep_attack_count and data_RQ2$kill_percent
## t = -0.84914, df = 568, p-value = 0.3962
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.11738845 0.04665545
## sample estimates:
## cor
## -0.03560635
ANSWER: A one-percent increase in receptions and attack attempts is associated with a reduction of .02 units of kill percentage. However, these results could easily occur by chance alone. The kill percentage at an average value of reception and attack attempts (of 22.8%) is 22.82%.
shapiro.test(data_RQ3$recep_prop)
##
## Shapiro-Wilk normality test
##
## data: data_RQ3$recep_prop
## W = 0.99001, p-value = 0.3375
ggplot(data = data_RQ3) +
geom_boxplot(mapping = aes(x = OH, y = recep_prop))
# changing this to a scatter plot
options(repr.plot.width=8, repr.plot.height=6)
p_scatter = ggplot(data = data_RQ3) +
geom_point(mapping = aes(x = OH, y = recep_prop)) +
theme_bw()
p_scatter
##This shows that the majority of receptions (proportion) lands between 0.1 and 0.3. A few outliers over 0.45
## first looking at all players (OH) holistically
theme_set(theme_bw(base_size=16))
dx <- density(data_RQ3$recep_prop)
data_RQ3%>%
ggplot(aes(x=recep_prop)) +
geom_density(alpha = 0.3)+
geom_vline(xintercept=5, size=1, color="red") +
xlim(c(min(dx$x), # X-axis limits
c(max(dx$x)))) +
labs(x= "OH proportion for every OH",
subtitle="Proportion of receptions by OH relative to their teams' receptions",
caption="Data Source: Data Volley")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## Warning: Please use `linewidth` instead.
## Warning: Removed 1 rows containing missing values (`geom_vline()`).
## data seem to follow a normal distribution.
##then looking just at teams
theme_set(theme_bw(base_size=10))
dx <- density(data_RQ3$recep_prop)
data_RQ3%>%
ggplot(aes(x=recep_prop, fill = team)) +
geom_density(alpha = 0.3)+
geom_vline(xintercept=5, size=1, color="red") +
xlim(c(min(dx$x), # X-axis limits
c(max(dx$x)))) +
labs(x= "OH proportion for every team",
subtitle="Proportion of receptions by OH for each team'",
caption="Data Source: Data Volley")
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 1 rows containing missing values (`geom_vline()`).
## different distribution patterns for every team
p = ggplot(data_RQ3, aes(x = category, y = recep_prop))
p = p + geom_violin(trim = FALSE, bw = 0.5)
p = p + geom_boxplot(with = 0.2, col = c("#CFB87C", "#565A5C"), fill = c("#CFB87C", "#565A5C"), alpha = 0.25)
## Warning in geom_boxplot(with = 0.2, col = c("#CFB87C", "#565A5C"), fill =
## c("#CFB87C", : Ignoring unknown parameters: `with`
p = p + geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = 0.6, alpha = 0.2)
p = p + stat_summary(fun.data = mean_sdl, color = c("#CFB87C", "#565A5C"), alpha = 0.5)
p = p +xlab("Team category") + ylab("Proportion or receptions by OH")
p = p + theme_bw()
p
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
# Histogram with density plot
ggplot(data_RQ3, aes(x=recep_prop, fill=category)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#FF6666") +
geom_vline(aes(xintercept=mean(recep_prop)),
color="blue", linetype="dashed", size=1)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## Warning: Please use `after_stat(density)` instead.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## data follows a normal distribution with mean around 0.20
ggplot(data_RQ3, aes(x=recep_prop, fill=category, color=category)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#FF6666") +
geom_vline(aes(xintercept=mean(recep_prop)),
color="blue", linetype="dashed", size=1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
min.mean.sd.max <- function(x) {
r <- c(min(x), mean(x) - 0.5*sd(x), mean(x), mean(x) + 0.5*sd(x), max(x))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
p1 <- ggplot(aes(y = recep_prop, x = factor(category)), data = data_RQ3)
p1 <- p1 + stat_summary(fun.data = min.mean.sd.max, geom = "boxplot") + geom_jitter(position=position_jitter(width=.2), size=3) + ggtitle("Boxplot with mean + 0.5 SD, 95%CI, max and min values.") + xlab("Team category") + ylab("Proportion of receptions by OH")
p1
m <- mean(data_RQ3$recep_prop)
sd <- sd(data_RQ3$recep_prop)
low <- m -sd
high <- m + sd
low
## [1] 0.1170786
high
## [1] 0.3125625
## low threshold = 0.12
## high threshold = 0.31
min.mean.sd.max <- function(x) {
r <- c(min(x), mean(x) - sd(x), mean(x), mean(x) + sd(x), max(x))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
p1 <- ggplot(aes(y = recep_prop, x = factor(team)), data = data_RQ3)
p1 <- p1 + stat_summary(fun.data = min.mean.sd.max, geom = "boxplot") + geom_jitter(position=position_jitter(width=.2), size=3) + ggtitle("Boxplot with mean + 1 SD, 95%CI, max and min values.") + xlab("Team") + ylab("Proportion of receptions and attacks by OH")
p1
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
p = ggplot(data_RQ3, aes(x=category, y=recep_prop));
p = p + geom_violin(trim = FALSE)
p = p+ geom_boxplot(with = 0.2, col = c("#CFB87C", "#565A5C"), fill = c("#CFB87C", "#565A5C"),alpha = 0.25)
## Warning in geom_boxplot(with = 0.2, col = c("#CFB87C", "#565A5C"), fill =
## c("#CFB87C", : Ignoring unknown parameters: `with`
p = p + theme_minimal()
p = p + coord_flip()
p = p +xlab("Team category") + ylab("Proportion of receptions by OH")
p = p + theme_bw()
p
ggplot(data = data_RQ3) +
geom_boxplot(mapping = aes(x = OH, y = attacks_prop))
# changing this to a scatter plot
options(repr.plot.width=8, repr.plot.height=6)
p_scatter = ggplot(data = data_RQ3) +
geom_point(mapping = aes(x = OH, y = attacks_prop)) +
theme_bw()
p_scatter
##This shows that the majority of receptions (proportion) lands between 0.2 and 0.6. A few outliers below 0.1
## first looking at all players (OH) holistically
theme_set(theme_bw(base_size=16))
dx <- density(data_RQ3$attacks_prop)
data_RQ3%>%
ggplot(aes(x=attacks_prop)) +
geom_density(alpha = 0.3)+
geom_vline(xintercept=5, size=1, color="red") +
xlim(c(min(dx$x), # X-axis limits
c(max(dx$x)))) +
labs(x= "OH proportion for every OH",
subtitle="Proportion of attacks by OH relative to their teams' attacks",
caption="Data Source: Data Volley")
## Warning: Removed 1 rows containing missing values (`geom_vline()`).
## data seem to follow a normal distribution.
##then looking just at teams
theme_set(theme_bw(base_size=8))
dx <- density(data_RQ3$attacks_prop)
data_RQ3%>%
ggplot(aes(x=attacks_prop, fill = team)) +
geom_density(alpha = 0.3)+
geom_vline(xintercept=5, size=1, color="red") +
xlim(c(min(dx$x), # X-axis limits
c(max(dx$x)))) +
labs(x= "OH proportion for every team",
subtitle="Proportion of attacks by OH for each team'",
caption="Data Source: Data Volley")
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 1 rows containing missing values (`geom_vline()`).
## different distribution patterns for every team
p = ggplot(data_RQ3, aes(x = category, y = attacks_prop))
p = p + geom_violin(trim = FALSE, bw = 0.5)
p = p + geom_boxplot(with = 0.2, col = c("#CFB87C", "#565A5C"), fill = c("#CFB87C", "#565A5C"), alpha = 0.25)
## Warning in geom_boxplot(with = 0.2, col = c("#CFB87C", "#565A5C"), fill =
## c("#CFB87C", : Ignoring unknown parameters: `with`
p = p + geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = 0.6, alpha = 0.2)
p = p + stat_summary(fun.data = mean_sdl, color = c("#CFB87C", "#565A5C"), alpha = 0.5)
p = p +xlab("Team category") + ylab("Proportion or attacks by OH")
p = p + theme_bw()
p
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
# Histogram with density plot
ggplot(data_RQ3, aes(x=attacks_prop, fill=category)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#FF6666") +
geom_vline(aes(xintercept=mean(attacks_prop)),
color="blue", linetype="dashed", size=1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## data follows a normal distribution with mean around 0.4
ggplot(data_RQ3, aes(x=attacks_prop, fill=category, color=category)) +
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="#FF6666") +
geom_vline(aes(xintercept=mean(attacks_prop)),
color="blue", linetype="dashed", size=1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
min.mean.sd.max <- function(x) {
r <- c(min(x), mean(x) - 0.5*sd(x), mean(x), mean(x) + 0.5*sd(x), max(x))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
p1 <- ggplot(aes(y = attacks_prop, x = factor(category)), data = data_RQ3)
p1 <- p1 + stat_summary(fun.data = min.mean.sd.max, geom = "boxplot") + geom_jitter(position=position_jitter(width=.2), size=3) + ggtitle("Boxplot with mean +- 0.5 SD, 95%CI, max and min values.") + xlab("Team category") + ylab("Proportion of attacks by OH")
p1
ma <- mean(data_RQ3$attacks_prop)
sda <- sd(data_RQ3$attacks_prop)
lowa <- ma -sda
higha <- ma + sda
lowa
## [1] 0.2754938
higha
## [1] 0.5308011
## low threshold = 0.28
## high threshold = 0.53
min.mean.sd.max <- function(x) {
r <- c(min(x), mean(x) - sd(x), mean(x), mean(x) + sd(x), max(x))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
p1 <- ggplot(aes(y = attacks_prop, x = factor(team)), data = data_RQ3)
p1 <- p1 + stat_summary(fun.data = min.mean.sd.max, geom = "boxplot") + geom_jitter(position=position_jitter(width=.2), size=3) + ggtitle("Boxplot with mean + 1 SD, 95%CI, max and min values.") + xlab("Team") + ylab("Proportion of attacks by OH")
p1
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
## Warning: Removed 2 rows containing missing values (`geom_segment()`).
p = ggplot(data_RQ3, aes(x=category, y=attacks_prop));
p = p + geom_violin(trim = FALSE)
p = p+ geom_boxplot(with = 0.2, col = c("#CFB87C", "#565A5C"), fill = c("#CFB87C", "#565A5C"),alpha = 0.25)
## Warning in geom_boxplot(with = 0.2, col = c("#CFB87C", "#565A5C"), fill =
## c("#CFB87C", : Ignoring unknown parameters: `with`
p = p + theme_minimal()
p = p + coord_flip()
p = p +xlab("Team category") + ylab("Proportion of attacks by OH")
p = p + theme_bw()
p